import requests
import json
import threading
from queue import Queue
import re

#定义请求头
header = {
    'accept': 'application/json',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'zh-CN,zh;q=0.9',
    'content-type': 'application/json',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.4.3505.400'
}
#定义代理
proxy = {"http":"http://127.0.0.1:1080"}

q = Queue()  #创建队列

def get_url():
    '''获取接口的链接'''
    count=0
    for page in range(72):  #循环72次，翻页72页
        format_url = "https://www.ebi.ac.uk/ebisearch/ws/rest/coding_release/?query=hgcA&start={0}&size=15&format=JSON&fieldurl=true&viewurl=true&fields=name&hlfields=description&entryattrs=score".format(count)
        try:
            req = requests.get(url=format_url,headers=header,proxies=proxy,timeout=5).text
            # print(req)
            data = json.loads(req)["entries"]
            for i in data:
                content_url = "https://www.ebi.ac.uk" + i["viewURLs"][0]["value"]  # 构造内容的url
                q.put(content_url)
                # print(content_url)
        except:
            print(count, "失败")
            pass
        count+=15

def download():
    '''下载内容'''
    count=1
    while True:
        url = q.get()
        pattern = r'.+id=(.*?)&style=raw'
        filename = re.compile(pattern).findall(url)[0]
        try:
            req = str(requests.get(url=url,headers=header,proxies=proxy,timeout=10).text)
            with open(r'./文章/{0}.txt'.format(filename), 'w', encoding='utf-8') as f:
                f.write(req)
                f.close()
                print("下载第{0}篇文章成功！".format(count))
        except:
            print(filename,"失败")
            pass
        count+=1


if __name__=='__main__':
    #开启多线程
    t1 = threading.Thread(target=get_url)
    t2 = threading.Thread(target=download)
    t1.start()
    t2.start()

